Preparations in file 00.Rmd
dog_ownership_cost <- read_rds("data/dog_ownership_cost.Rds") %>%
select(-cost_compared_to_other_breeds)
length(unique(dog_ownership_cost$SSC_NAME16))[1] 187
length(unique(dog_ownership_cost$dog_breed))[1] 182
SSC <- read_rds("data/geo/SSC.Rds")
length(unique(SSC$SSC_NAME16))[1] 184
wide_cost_n <- read_rds("data/wide_cost_n.Rds")
wide_cost_p <- read_rds("data/wide_cost_p.Rds")Summarizing all dogs, and expensive only.
dog_ownership_agg <- dog_ownership_cost %>%
group_by(SSC_NAME16) %>%
summarise(dogs_total = n(),
dogs_exp = sum(expensive))
SSC %<>%
left_join(dog_ownership_agg)Areas with no URP/SEIFA but having (small amount of) dogs:
SSC_NAME16 dogs_total
1 Eagle Farm 1
2 Enoggera Reservoir 3
3 Karawatha 6
4 Lytton 1
Since these areas have no SEIFA they will be excluded.
Area with low URP and no dogs at all:
SSC_NAME16 dogs_total URP
1 England Creek NA 33
NA was assumed to mean 0 here.
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.0000 0.1178 0.1366 0.1403 0.1650 0.2636 1
seifa_means <- function (seifa_index) {
myenc <- enquo(seifa_index)
SSC %>%
st_drop_geometry() %>%
group_by(!!myenc) %>%
summarize(mean = mean(dogs_exp_prop, na.rm = TRUE),
sd = sd(dogs_exp_prop, na.rm = TRUE),
p25 = quantile(dogs_exp_prop, c(0.25), na.rm = TRUE),
p50 = quantile(dogs_exp_prop, c(0.50), na.rm = TRUE),
p75 = quantile(dogs_exp_prop, c(0.75), na.rm = TRUE)) %>%
ungroup()
}
seifa_cor <- function (seifa_index) {
myenc <- enquo(seifa_index)
SSC %>%
st_drop_geometry() %>%
select(!!myenc, dogs_exp_prop) %>%
mutate_if(is.factor, as.numeric) %>%
correlation(method = "kendall")
}
seifa_plot <- function (seifa_index) {
model <- eval(substitute(lm(dogs_exp_prop ~ seifa_index,
data = SSC, na.action = na.omit)))
means <- estimate_means(model)
myenc <- enquo(seifa_index)
ggplot(SSC,
aes(x = !!myenc,
y = dogs_exp_prop,
fill = !!myenc)) +
geom_violin(alpha = 0.66) +
geom_jitter2(width = 0.05, alpha = 0.5) +
geom_line(data = means, aes(y = Mean, group = 1), size = 1) +
geom_pointrange(data = means,
aes(y = Mean, ymin = CI_low, ymax = CI_high),
size = 1,
color = "white") +
scale_fill_brewer(palette = "BrBG") +
ylab("Proportion of expensive dogs") +
theme_modern()
}seifa_means(IRSD_d)# A tibble: 10 x 6
IRSD_d mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.161 0.0443 0.146 0.159 0.184
2 2 0.130 0.0833 0.0913 0.155 0.170
3 3 0.147 0.0258 0.129 0.140 0.160
4 4 0.147 0.0411 0.121 0.144 0.173
5 5 0.145 0.0305 0.130 0.137 0.155
6 6 0.142 0.0350 0.115 0.136 0.156
7 7 0.127 0.0213 0.113 0.128 0.136
8 8 0.148 0.0268 0.125 0.148 0.163
9 9 0.126 0.0351 0.112 0.122 0.141
10 10 0.127 0.0336 0.104 0.114 0.152
seifa_cor(IRSD_d)# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
-----------------------------------------------------------------------
IRSD_d | dogs_exp_prop | -0.19 | [-0.28, -0.10] | -3.69 | < .001***
p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IRSD_d)seifa_means(IRSD_d_orig)# A tibble: 10 x 6
IRSD_d_orig mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.185 0.0179 0.179 0.193 0.195
2 2 0.164 0.0605 0.138 0.161 0.179
3 3 0.142 0.0540 0.125 0.159 0.180
4 4 0.161 0.0163 0.151 0.158 0.159
5 5 0.150 0.0654 0.122 0.160 0.178
6 6 0.122 0.0813 0.0962 0.136 0.166
7 7 0.143 0.0346 0.124 0.142 0.166
8 8 0.150 0.0312 0.130 0.140 0.157
9 9 0.136 0.0321 0.112 0.131 0.149
10 10 0.134 0.0316 0.113 0.127 0.156
seifa_cor(IRSD_d_orig)# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
------------------------------------------------------------------------
IRSD_d_orig | dogs_exp_prop | -0.18 | [-0.27, -0.08] | -3.31 | < .001***
p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IRSD_d_orig)seifa_means(IRSAD_d)# A tibble: 10 x 6
IRSAD_d mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.156 0.0641 0.138 0.162 0.186
2 2 0.140 0.0689 0.130 0.156 0.173
3 3 0.143 0.0232 0.127 0.144 0.157
4 4 0.157 0.0408 0.135 0.158 0.185
5 5 0.139 0.0311 0.128 0.135 0.146
6 6 0.137 0.0262 0.115 0.129 0.154
7 7 0.136 0.0354 0.115 0.129 0.146
8 8 0.141 0.0260 0.123 0.132 0.155
9 9 0.132 0.0370 0.114 0.124 0.154
10 10 0.121 0.0315 0.101 0.112 0.127
seifa_cor(IRSAD_d)# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
-----------------------------------------------------------------------
IRSAD_d | dogs_exp_prop | -0.23 | [-0.32, -0.13] | -4.35 | < .001***
p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IRSAD_d)seifa_means(IRSAD_d_orig)# A tibble: 10 x 6
IRSAD_d_orig mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.185 0.0179 0.179 0.193 0.195
2 2 0.145 0.0955 0.0751 0.131 0.201
3 3 0.161 0.0248 0.143 0.161 0.179
4 4 0.138 0.0183 0.131 0.138 0.144
5 5 0.153 0.0777 0.151 0.158 0.176
6 6 0.126 0.0875 0.0641 0.159 0.177
7 7 0.157 0.0258 0.136 0.156 0.173
8 8 0.147 0.0297 0.127 0.144 0.155
9 9 0.144 0.0352 0.129 0.143 0.162
10 10 0.133 0.0319 0.112 0.127 0.149
seifa_cor(IRSAD_d_orig)# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
-------------------------------------------------------------------------
IRSAD_d_orig | dogs_exp_prop | -0.22 | [-0.31, -0.13] | -3.98 | < .001***
p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IRSAD_d_orig)seifa_means(IER_d)# A tibble: 10 x 6
IER_d mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.162 0.0452 0.136 0.165 0.187
2 2 0.137 0.0331 0.126 0.134 0.154
3 3 0.138 0.0210 0.118 0.137 0.152
4 4 0.144 0.0410 0.115 0.129 0.152
5 5 0.125 0.0702 0.110 0.138 0.165
6 6 0.152 0.0288 0.129 0.156 0.172
7 7 0.132 0.0424 0.113 0.139 0.150
8 8 0.132 0.0256 0.115 0.129 0.153
9 9 0.137 0.0425 0.114 0.122 0.146
10 10 0.144 0.0439 0.114 0.153 0.171
seifa_cor(IER_d)# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
------------------------------------------------------------------
IER_d | dogs_exp_prop | -0.08 | [-0.18, 0.02] | -1.56 | 0.118
p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IER_d)seifa_means(IER_d_orig)# A tibble: 10 x 6
IER_d_orig mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.161 0.0440 0.140 0.162 0.183
2 2 0.137 0.0316 0.124 0.132 0.154
3 3 0.145 0.0387 0.117 0.137 0.163
4 4 0.134 0.0263 0.114 0.128 0.144
5 5 0.130 0.0693 0.114 0.143 0.173
6 6 0.145 0.0297 0.127 0.138 0.161
7 7 0.144 0.0340 0.128 0.141 0.154
8 8 0.129 0.0465 0.127 0.130 0.152
9 9 0.139 0.0369 0.115 0.132 0.164
10 10 0.138 0.0412 0.111 0.127 0.164
seifa_cor(IER_d_orig)# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
------------------------------------------------------------------
IER_d_orig | dogs_exp_prop | -0.08 | [-0.18, 0.01] | -1.57 | 0.116
p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IER_d_orig)seifa_means(IEO_d)# A tibble: 10 x 6
IEO_d mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.156 0.0646 0.133 0.165 0.188
2 2 0.166 0.0265 0.152 0.159 0.178
3 3 0.132 0.0683 0.123 0.144 0.162
4 4 0.146 0.0292 0.123 0.145 0.165
5 5 0.146 0.0385 0.130 0.147 0.162
6 6 0.153 0.0306 0.131 0.152 0.171
7 7 0.133 0.0276 0.117 0.129 0.136
8 8 0.124 0.0304 0.115 0.125 0.132
9 9 0.127 0.0209 0.113 0.127 0.132
10 10 0.119 0.0302 0.106 0.112 0.119
seifa_cor(IEO_d)# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
-----------------------------------------------------------------------
IEO_d | dogs_exp_prop | -0.30 | [-0.39, -0.21] | -5.84 | < .001***
p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IEO_d)seifa_means(IEO_d_orig)# A tibble: 10 x 6
IEO_d_orig mean sd p25 p50 p75
<fct> <dbl> <dbl> <dbl> <dbl> <dbl>
1 1 0.153 0.0665 0.138 0.179 0.194
2 2 0.175 0.0911 0.131 0.180 0.222
3 3 0.0625 0.0884 0.0312 0.0625 0.0938
4 4 0.172 0.0393 0.145 0.165 0.188
5 5 0.168 0.0221 0.151 0.171 0.175
6 6 0.166 0.0260 0.161 0.176 0.182
7 7 0.136 0.0733 0.127 0.154 0.159
8 8 0.150 0.0320 0.132 0.148 0.162
9 9 0.147 0.0331 0.130 0.147 0.165
10 10 0.130 0.0300 0.112 0.127 0.144
seifa_cor(IEO_d_orig)# Correlation Matrix (kendall-method)
Parameter1 | Parameter2 | tau | 95% CI | z | p
-----------------------------------------------------------------------
IEO_d_orig | dogs_exp_prop | -0.27 | [-0.35, -0.17] | -4.82 | < .001***
p-value adjustment method: Holm (1979)
Observations: 183
seifa_plot(IEO_d_orig)data <-
# wide_cost_n %>%
wide_cost_p %>%
st_drop_geometry() %>%
select(akita:last_col()) %>%
as_tibble()
# View(cov(data))pca <- principal_components(data,
# standardize = FALSE,
n = "auto")
pca# Loadings from Principal Component Analysis (no rotation)
Variable | PC1 | Complexity
----------------------------------------------
akita | -0.10 | 1.00
british_bulldog | 0.21 | 1.00
dogue_de_bordeaux | 0.14 | 1.00
french_bulldog | 0.54 | 1.00
german_shepherd | -0.78 | 1.00
irish_wolfhound | -0.46 | 1.00
maltese | 0.80 | 1.00
rottweiler | -0.58 | 1.00
samoyed | 0.17 | 1.00
yorkshire_terrier | 0.35 | 1.00
chinese_crested_dog | -1.92e-03 | 1.00
chow_chow | 0.20 | 1.00
lowchen | 0.20 | 1.00
saluki | 0.07 | 1.00
pharaoh_hound | -0.22 | 1.00
st_bernard | -0.32 | 1.00
tibetan_mastiff | -0.04 | 1.00
canadian_eskimo_dog | 0.08 | 1.00
black_russian_terrier | 0.10 | 1.00
The unique principal component accounted for 13.57% of the total variance of the original data.
summary(pca)# (Explained) Variance of Components
Parameter | PC1
---------------------------------------
Eigenvalues | 2.578
Variance Explained | 0.136
Variance Explained (Cumulative) | 0.136
Variance Explained (Proportion) | 0.136
plot(pca)pca_results <- wide_cost_p %>%
st_drop_geometry() %>%
as_tibble() %>%
select(SSC_CODE16:caution) %>%
mutate(pca_raw = predict(pca)$Component_1,
pca = ntile(pca_raw, 10)) IRSD_dggplot(pca_results, aes(x = IRSD, y = pca_raw)) +
geom_point()pca == IRSD_d <lgl>
# total N=180 valid N=179 mean=0.13 sd=0.34
Value | N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 155 | 86.11 | 86.59 | 86.59
TRUE | 24 | 13.33 | 13.41 | 100.00
<NA> | 1 | 0.56 | <NA> | <NA>
IRSAD_dggplot(pca_results, aes(x = IRSAD, y = pca_raw)) +
geom_point()pca == IRSAD_d <lgl>
# total N=180 valid N=179 mean=0.14 sd=0.35
Value | N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 154 | 85.56 | 86.03 | 86.03
TRUE | 25 | 13.89 | 13.97 | 100.00
<NA> | 1 | 0.56 | <NA> | <NA>
IER_dggplot(pca_results, aes(x = IER, y = pca_raw)) +
geom_point()pca == IER_d <lgl>
# total N=180 valid N=179 mean=0.11 sd=0.32
Value | N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 159 | 88.33 | 88.83 | 88.83
TRUE | 20 | 11.11 | 11.17 | 100.00
<NA> | 1 | 0.56 | <NA> | <NA>
IEO_dggplot(pca_results, aes(x = IEO, y = pca_raw)) +
geom_point()pca == IEO_d <lgl>
# total N=180 valid N=179 mean=0.18 sd=0.38
Value | N | Raw % | Valid % | Cum. %
--------------------------------------
FALSE | 147 | 81.67 | 82.12 | 82.12
TRUE | 32 | 17.78 | 17.88 | 100.00
<NA> | 1 | 0.56 | <NA> | <NA>
data <-
wide_cost_p %>%
st_drop_geometry() %>%
select(akita:last_col()) %>%
as_tibble()n <- n_clusters(data, package = c("easystats", "NbClust", "mclust"))
n# Method Agreement Procedure:
The choice of 2 clusters is supported by 7 (33.33%) methods out of 21 (Elbow, Silhouette, Duda, Pseudot2, Beale, Mcclain, Dunn).
plot(n)rez_kmeans <- cluster_analysis(data, n = 2, method = "kmeans")
rez_kmeans# Clustering Solution
The 2 clusters accounted for 5.70% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 | 44 | 1127.34 | -0.36 | 0.88 | 0.07 | 1.01 | -0.69 | -0.25 | 0.12 | -0.32 | 0.31 | 0.47 | 0.21 | 0.20 | 0.53 | -0.14 | 0.07 | -0.14 | 0.11 | 0.13 | -0.07
2 | 136 | 2079.91 | 0.12 | -0.28 | -0.02 | -0.33 | 0.22 | 0.08 | -0.04 | 0.10 | -0.10 | -0.15 | -0.07 | -0.07 | -0.17 | 0.04 | -0.02 | 0.05 | -0.03 | -0.04 | 0.02
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
--------------------------------------------------------------------
3401.000 | 193.745 | 3207.255 | 0.057
# You can access the predicted clusters via 'predict()'.
plot(rez_kmeans)plot(summary(rez_kmeans))cluster_results <- wide_cost_p %>%
st_drop_geometry() %>%
as_tibble() %>%
mutate(cluster = predict(rez_kmeans))
aggregate(data = cluster_results, german_shepherd ~ cluster, mean) cluster german_shepherd
1 1 0.1226264
2 2 0.2429998
aggregate(data = cluster_results, maltese ~ cluster, mean) cluster maltese
1 1 0.5415989
2 2 0.5198849
rez_hclust <- cluster_analysis(data, n = 2, method = "hclust")
rez_hclust# Clustering Solution
The 2 clusters accounted for 4.56% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 | 178 | 3144.63 | 4.91e-03 | -0.02 | 6.51e-03 | 5.81e-03 | 0.01 | 6.38e-03 | 6.68e-03 | -0.01 | -0.02 | -0.04 | -0.07 | 5.26e-03 | 4.87e-03 | 2.80e-03 | -0.05 | 3.53e-03 | 2.74e-03 | 1.30e-03 | 1.97e-03
2 | 2 | 101.45 | -0.44 | 2.02 | -0.58 | -0.52 | -1.16 | -0.57 | -0.59 | 0.90 | 1.60 | 3.52 | 5.82 | -0.47 | -0.43 | -0.25 | 4.41 | -0.31 | -0.24 | -0.12 | -0.18
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
--------------------------------------------------------------------
3401.000 | 154.924 | 3246.076 | 0.046
# You can access the predicted clusters via 'predict()'.
plot(rez_hclust)rez_hkmeans <- cluster_analysis(data, n = 2, method = "hkmeans")
rez_hkmeans# Clustering Solution
The 2 clusters accounted for 5.82% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 | 178 | 3133.72 | 4.91e-03 | -0.01 | 6.51e-03 | 4.65e-03 | -8.26e-03 | 2.37e-03 | 0.02 | -0.01 | 6.15e-03 | 6.33e-03 | -0.04 | 5.26e-03 | 4.87e-03 | 2.80e-03 | -0.10 | -0.01 | -0.03 | 1.30e-03 | 1.97e-03
2 | 2 | 69.36 | -0.44 | 1.15 | -0.58 | -0.41 | 0.74 | -0.21 | -1.78 | 1.14 | -0.55 | -0.56 | 3.17 | -0.47 | -0.43 | -0.25 | 8.56 | 0.98 | 2.33 | -0.12 | -0.18
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
--------------------------------------------------------------------
3401.000 | 197.920 | 3203.080 | 0.058
# You can access the predicted clusters via 'predict()'.
plot(rez_hkmeans)rez_pam <- cluster_analysis(data, n = 2, method = "pam")
rez_pam# Clustering Solution
The 2 clusters accounted for 4.07% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 | 98 | 1581.65 | -0.01 | -0.51 | -0.11 | -0.25 | 0.26 | -0.09 | 0.04 | 0.05 | 0.03 | -7.38e-03 | -0.14 | -0.25 | -0.36 | 0.08 | -0.03 | 0.10 | 0.02 | -0.01 | -0.06
2 | 82 | 1680.86 | 0.01 | 0.61 | 0.13 | 0.30 | -0.30 | 0.11 | -0.04 | -0.05 | -0.03 | 8.82e-03 | 0.17 | 0.29 | 0.43 | -0.09 | 0.03 | -0.12 | -0.03 | 0.02 | 0.08
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
--------------------------------------------------------------------
3401.000 | 138.493 | 3262.507 | 0.041
# You can access the predicted clusters via 'predict()'.
plot(rez_pam)rez_hclust2 <- cluster_analysis(data,
n = NULL,
method = "hclust",
iterations = 500,
ci = 0.90)
rez_hclust2# Clustering Solution
The 9 clusters accounted for 7.68% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
0 | 160 | 3105.01 | -0.07 | 0.05 | 0.04 | -0.02 | 0.04 | 0.06 | -0.09 | 0.05 | -1.50e-04 | 0.03 | 0.02 | 0.03 | 0.05 | 0.03 | 0.02 | 0.03 | 0.02 | 0.01 | -0.06
1 | 2 | 0.00 | -0.44 | -1.20 | -0.58 | -0.98 | 2.19 | -0.57 | -0.18 | -1.25 | -0.55 | -0.56 | -0.42 | -0.47 | -0.43 | -0.25 | -0.18 | -0.31 | -0.24 | -0.12 | -0.18
2 | 2 | 0.21 | -0.44 | 0.53 | -0.58 | -0.12 | -0.42 | -0.57 | 0.87 | -0.07 | -0.42 | -0.04 | -0.42 | -0.47 | -0.43 | -0.25 | -0.18 | -0.31 | -0.24 | -0.12 | -0.18
3 | 2 | 0.32 | -0.28 | -0.33 | 0.27 | -0.46 | -0.05 | -0.28 | 0.42 | 0.19 | -0.45 | -0.19 | 0.18 | 0.24 | 0.15 | -0.25 | -0.18 | -0.31 | -0.24 | -0.12 | -0.18
4 | 2 | 0.77 | 0.56 | -0.21 | -0.39 | -0.83 | -0.47 | -0.38 | 1.10 | -0.21 | -0.11 | 0.05 | -0.09 | 0.27 | -0.43 | -0.25 | -0.18 | -0.31 | 0.96 | -0.12 | -0.18
5 | 2 | 1.12 | 0.98 | 0.83 | 1.02 | 0.02 | -0.47 | 0.01 | -0.05 | 0.47 | -0.11 | -0.44 | 1.48 | -0.47 | -0.43 | -0.25 | -0.18 | -0.31 | -0.24 | -0.12 | -0.18
6 | 2 | 1.58 | 3.59 | -0.49 | -0.58 | -0.22 | 0.36 | -0.57 | -0.23 | -0.08 | 2.46 | -0.56 | -0.42 | -0.47 | -0.43 | -0.25 | -0.18 | -0.31 | -0.24 | -0.12 | -0.18
7 | 2 | 4.47 | 2.59 | -0.74 | -0.58 | -0.73 | -0.28 | -0.41 | 0.87 | -0.06 | -0.19 | -0.20 | -0.42 | 0.22 | -0.43 | -0.25 | -0.18 | -0.31 | -0.24 | -0.12 | 6.45
8 | 6 | 26.31 | -0.44 | -0.71 | -0.58 | 1.61 | -1.27 | -0.57 | 1.56 | -0.89 | -0.21 | -0.02 | -0.42 | -0.47 | -0.43 | -0.25 | -0.18 | -0.14 | -0.24 | -0.12 | -0.18
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
--------------------------------------------------------------------
3401.000 | 261.207 | 34.778 | 0.077
# You can access the predicted clusters via 'predict()'.
plot(rez_hclust2)eps <- n_clusters_dbscan(data, min_size = 0.01)
epsThe DBSCAN method, based on the total clusters sum of squares, suggests that the optimal eps = 10.1333352087498 (with min. cluster size set to 2), which corresponds to 0 clusters.
plot(eps)rez_dbscan <- cluster_analysis(data, method = "dbscan", dbscan_eps = 5)
rez_dbscan# Clustering Solution
The 2 clusters accounted for 2.82% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
0 | 38 | 2138.96 | 0.35 | 0.12 | 0.09 | 0.21 | 0.32 | 0.12 | -0.72 | 0.05 | 0.26 | 0.41 | 0.27 | 0.02 | -6.68e-03 | 0.35 | 0.28 | 0.42 | 0.33 | 0.37 | 0.49
1 | 142 | 1166.13 | -0.09 | -0.03 | -0.02 | -0.06 | -0.09 | -0.03 | 0.19 | -0.01 | -0.07 | -0.11 | -0.07 | -4.45e-03 | 1.79e-03 | -0.09 | -0.07 | -0.11 | -0.09 | -0.10 | -0.13
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
--------------------------------------------------------------------
3401.000 | 95.918 | 1166.126 | 0.028
# You can access the predicted clusters via 'predict()'.
plot(rez_dbscan)rez_hdbscan <- cluster_analysis(data, method = "hdbscan")
rez_hdbscan# Clustering Solution
The unique cluster accounted for 0.00% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
0 | 180 | 3401 | 3.67e-17 | 7.76e-17 | -6.48e-17 | -3.38e-17 | -5.59e-17 | -4.32e-17 | -6.44e-17 | -4.31e-17 | 2.14e-17 | 3.45e-17 | -3.28e-17 | -9.67e-18 | -2.51e-17 | -1.17e-17 | -7.91e-18 | -8.96e-18 | 1.19e-18 | -7.56e-18 | -4.30e-18
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
--------------------------------------------------------------------
3401.000 | 0.000 | 0.000 | 0.000
# You can access the predicted clusters via 'predict()'.
# plot(rez_hdbscan)rez_pamk <- cluster_analysis(data, method = "pamk")
rez_pamk # Clustering Solution
The 10 clusters accounted for 38.96% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 | 63 | 494.10 | 0.07 | -0.43 | -0.07 | -0.31 | 0.08 | -0.12 | 0.30 | 0.01 | -0.12 | -0.12 | -0.09 | -0.38 | -0.33 | -0.17 | -0.07 | -0.02 | -0.01 | -0.12 | -0.18
10 | 1 | 0.00 | -0.44 | 3.51 | -0.58 | -0.98 | -1.09 | -0.57 | -1.20 | 3.05 | -0.55 | -0.56 | 6.76 | -0.47 | -0.43 | -0.25 | 9.00 | -0.31 | -0.24 | -0.12 | -0.18
2 | 10 | 108.05 | -0.32 | 0.34 | -0.31 | 2.83 | -1.07 | -0.50 | 0.43 | -0.85 | -0.31 | 0.50 | 0.06 | -0.47 | 0.15 | -0.12 | -0.18 | -0.21 | -0.24 | -0.12 | 0.66
3 | 46 | 496.40 | 0.08 | 0.50 | 0.36 | 8.09e-03 | -0.22 | 0.13 | -7.48e-03 | 0.09 | -0.23 | -0.12 | 0.16 | -0.21 | 0.80 | -0.07 | -0.03 | -0.15 | -0.14 | -0.12 | -0.12
4 | 12 | 358.09 | -0.06 | -0.46 | -0.42 | -0.76 | 2.40 | 1.38 | -2.65 | 1.26 | -0.44 | -0.56 | -0.25 | -0.30 | -0.43 | -0.25 | -0.18 | 1.10 | -0.24 | -0.12 | -0.18
5 | 35 | 470.89 | -0.15 | 0.24 | -0.11 | 0.07 | -0.35 | -0.23 | 0.23 | -0.28 | 0.86 | 0.46 | -0.11 | 1.22 | -0.26 | -0.07 | -0.14 | -0.09 | -0.21 | -0.05 | -0.18
6 | 4 | 99.41 | -0.44 | 0.42 | -0.20 | 0.14 | 0.44 | 0.18 | -0.59 | -0.50 | -0.55 | -0.13 | -0.42 | 0.17 | -0.12 | -0.25 | 1.89 | 0.59 | 5.65 | -0.12 | -0.18
7 | 4 | 17.65 | -0.44 | -0.77 | 0.13 | -0.19 | -0.09 | -0.06 | 0.37 | -0.39 | -7.94e-03 | -0.05 | -0.42 | -0.04 | -0.43 | 5.65 | -0.18 | -0.05 | -0.24 | -0.12 | -0.18
8 | 3 | 20.47 | 1.58 | -0.89 | 0.10 | -0.51 | -0.16 | -0.34 | 0.52 | -0.11 | -0.07 | 0.27 | 0.16 | -8.46e-03 | 0.12 | -0.25 | -0.18 | -0.31 | -0.24 | -0.12 | 6.65
9 | 2 | 10.91 | 0.30 | -0.69 | 0.21 | 0.11 | -0.20 | -0.39 | 0.73 | -0.48 | -0.23 | -0.56 | 0.36 | -0.47 | -0.43 | -0.25 | -0.18 | -0.31 | -0.24 | 9.14 | -0.18
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
--------------------------------------------------------------------
3401.000 | 1325.018 | 2075.982 | 0.390
# You can access the predicted clusters via 'predict()'.
plot(rez_pamk)p_load(mclust)
rez_mixture <- cluster_analysis(data, method = "mixture")
rez_mixture# Clustering Solution
The unique cluster accounted for 0.00% of the total variance of the original data.
Cluster | n_Obs | Sum_Squares | akita | british_bulldog | dogue_de_bordeaux | french_bulldog | german_shepherd | irish_wolfhound | maltese | rottweiler | samoyed | yorkshire_terrier | chinese_crested_dog | chow_chow | lowchen | saluki | pharaoh_hound | st_bernard | tibetan_mastiff | canadian_eskimo_dog | black_russian_terrier
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
1 | 180 | 3401 | 3.67e-17 | 7.76e-17 | -6.48e-17 | -3.38e-17 | -5.59e-17 | -4.32e-17 | -6.44e-17 | -4.31e-17 | 2.14e-17 | 3.45e-17 | -3.28e-17 | -9.67e-18 | -2.51e-17 | -1.17e-17 | -7.91e-18 | -8.96e-18 | 1.19e-18 | -7.56e-18 | -4.30e-18
# Indices of model performance
Sum_Squares_Total | Sum_Squares_Between | Sum_Squares_Within | R2
--------------------------------------------------------------------
3401.000 | 0.000 | 3401.000 | 0.000
# You can access the predicted clusters via 'predict()'.
plot(rez_mixture)list_of_results <- list(rez_kmeans, rez_hclust, rez_hkmeans, rez_pam,
rez_hclust2, rez_dbscan, rez_hdbscan, rez_mixture)
probability_matrix <- cluster_meta(list_of_results)
heatmap(probability_matrix, scale = "none",
col = grDevices::hcl.colors(256, palette = "inferno"))R version 4.1.2 (2021-11-01) Platform: x86_64-w64-mingw32/x64 (64-bit) Running under: Windows 10 x64 (build 18363) Matrix products: default attached base packages: [1] stats graphics grDevices utils datasets methods base other attached packages: [1] mclust_5.4.9 parameters_0.16.0 modelbased_0.9.0 see_0.6.8 [5] correlation_0.7.1 tmap_3.3-2 sf_1.0-5 DT_0.20 [9] sjPlot_2.8.10 sjmisc_2.8.9 scales_1.1.1 magrittr_2.0.1 [13] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.7 purrr_0.3.4 [17] readr_2.1.1 tidyr_1.1.4 tibble_3.1.6 ggplot2_3.3.5 [21] tidyverse_1.3.1 pacman_0.5.1To cite R in publications use:
R Core Team (2021). R: A Language and Environment for Statistical Computing. R Foundation for Statistical Computing, Vienna, Austria. https://www.R-project.org/.
To cite the ggplot2 package in publications use:Wickham H (2016). ggplot2: Elegant Graphics for Data Analysis. Springer-Verlag New York. ISBN 978-3-319-24277-4, https://ggplot2.tidyverse.org.